//////////////////////////////////////////////////////////////////////
////                                                              ////
////  or1200_fpu_post_norm_mul                                    ////
////                                                              ////
////  This file is part of the OpenRISC 1200 project              ////
////  http://opencores.org/project,or1k                           ////
////                                                              ////
////  Description                                                 ////
////  post-normalization entity for the multiplication unit       ////
////                                                              ////
////  To Do:                                                      ////
////                                                              ////
////                                                              ////
////  Author(s):                                                  ////
////      - Original design (FPU100) -                            ////
////        Jidan Al-eryani, jidan@gmx.net                        ////
////      - Conv. to Verilog and inclusion in OR1200 -            ////
////        Julius Baxter, julius@opencores.org                   ////
////                                                              ////
//////////////////////////////////////////////////////////////////////
//
//  Copyright (C) 2006, 2010
//
//	This source file may be used and distributed without        
//	restriction provided that this copyright statement is not   
//	removed from the file and that any derivative work contains 
//	the original copyright notice and the associated disclaimer.
//                                                           
//		THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY     
//	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED   
//	TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS   
//	FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR      
//	OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,         
//	INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES    
//	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE   
//	GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR        
//	BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF  
//	LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT  
//	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT  
//	OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         
//	POSSIBILITY OF SUCH DAMAGE. 
//

module or1200_fpu_post_norm_mul(
		     clk_i,
		     opa_i,
		     opb_i,
		     exp_10_i,
		     fract_48_i,
		     sign_i,
		     rmode_i,
		     output_o,
		     ine_o
		     );

   parameter FP_WIDTH = 32;
   parameter MUL_SERIAL = 0; // 0 for parallel multiplier, 1 for serial
   parameter MUL_COUNT = 11; //11 for parallel multiplier, 34 for serial
   parameter FRAC_WIDTH = 23;
   parameter EXP_WIDTH = 8;
   parameter ZERO_VECTOR = 31'd0;
   parameter INF = 31'b1111111100000000000000000000000;
   parameter QNAN = 31'b1111111110000000000000000000000;
   parameter SNAN = 31'b1111111100000000000000000000001;

   input clk_i;
   input [FP_WIDTH-1:0] opa_i;
   input [FP_WIDTH-1:0] opb_i;
   input [EXP_WIDTH+1:0] exp_10_i;
   input [2*FRAC_WIDTH+1:0] fract_48_i;
   input 		    sign_i;
   input [1:0] 		    rmode_i;
   output reg [FP_WIDTH-1:0]    output_o;
   output reg 		    ine_o;

   
   reg [EXP_WIDTH-1:0]     s_expa;
   reg [EXP_WIDTH-1:0]     s_expb;
   reg [EXP_WIDTH+1:0]     s_exp_10_i;
   reg [2*FRAC_WIDTH+1:0]  s_fract_48_i;
   reg 			   s_sign_i;   
   wire [FP_WIDTH-1:0] 	    s_output_o;
   wire 		    s_ine_o;
   wire 		    s_overflow;   
   reg [FP_WIDTH-1:0] 	    s_opa_i;
   reg [FP_WIDTH-1:0] 	    s_opb_i;
   reg [1:0] 		    s_rmode_i;

   reg [5:0] 		    s_zeros;
   wire 		    s_carry;   
   reg [5:0] 		    s_shr2;
   reg [5:0] 		    s_shl2;
   reg [8:0] 		    s_expo1;
   wire [8:0] 		    s_expo2b;
   wire [9:0] 		    s_exp_10a;
   wire [9:0] 		    s_exp_10b;
   reg [47:0] 		    s_frac2a;

   wire 		    s_sticky, s_guard, s_round;   
   wire 		    s_roundup;   
   reg [24:0] 		    s_frac_rnd;
   wire [24:0] 		    s_frac3;
   wire 		    s_shr3;   
   reg [5:0] 		    s_r_zeros;
   wire 		    s_lost;   
   wire 		    s_op_0;   
   wire [8:0] 		    s_expo3;

   wire 		    s_infa, s_infb;   
   wire 		    s_nan_in, s_nan_op, s_nan_a, s_nan_b;   

   
   // Input Register
   always @(posedge clk_i)
     begin
	s_opa_i <= opa_i;
	s_opb_i <= opb_i;	
	s_expa <= opa_i[30:23];
	s_expb <= opb_i[30:23];
	s_exp_10_i <= exp_10_i;
	s_fract_48_i <= fract_48_i;
	s_sign_i <= sign_i;
	s_rmode_i <= rmode_i;
     end

   // Output register
   always @(posedge clk_i)
     begin
	output_o <= s_output_o;
	ine_o	<= s_ine_o;
     end

   //*** Stage 1 ****
   // figure out the exponent and howmuch the fraction has to be shiftd 
   // right/left
	
   assign s_carry = s_fract_48_i[47];
   
   
   always @(posedge clk_i)
     if (!s_fract_48_i[47])
       casez(s_fract_48_i[46:1])	// synopsys full_case parallel_case
	 46'b1?????????????????????????????????????????????: s_zeros <=  0;
	 46'b01????????????????????????????????????????????: s_zeros <=  1;
	 46'b001???????????????????????????????????????????: s_zeros <=  2;
	 46'b0001??????????????????????????????????????????: s_zeros <=  3;
	 46'b00001?????????????????????????????????????????: s_zeros <=  4;
	 46'b000001????????????????????????????????????????: s_zeros <=  5;
	 46'b0000001???????????????????????????????????????: s_zeros <=  6;
	 46'b00000001??????????????????????????????????????: s_zeros <=  7;
	 46'b000000001?????????????????????????????????????: s_zeros <=  8;
	 46'b0000000001????????????????????????????????????: s_zeros <=  9;
	 46'b00000000001???????????????????????????????????: s_zeros <=  10;
	 46'b000000000001??????????????????????????????????: s_zeros <=  11;
	 46'b0000000000001?????????????????????????????????: s_zeros <=  12;
	 46'b00000000000001????????????????????????????????: s_zeros <=  13;
	 46'b000000000000001???????????????????????????????: s_zeros <=  14;
	 46'b0000000000000001??????????????????????????????: s_zeros <=  15;
	 46'b00000000000000001?????????????????????????????: s_zeros <=  16;
	 46'b000000000000000001????????????????????????????: s_zeros <=  17;
	 46'b0000000000000000001???????????????????????????: s_zeros <=  18;
	 46'b00000000000000000001??????????????????????????: s_zeros <=  19;
	 46'b000000000000000000001?????????????????????????: s_zeros <=  20;
	 46'b0000000000000000000001????????????????????????: s_zeros <=  21;
	 46'b00000000000000000000001???????????????????????: s_zeros <=  22;
	 46'b000000000000000000000001??????????????????????: s_zeros <=  23;
	 46'b0000000000000000000000001?????????????????????: s_zeros <=  24;
	 46'b00000000000000000000000001????????????????????: s_zeros <=  25;
	 46'b000000000000000000000000001???????????????????: s_zeros <=  26;
	 46'b0000000000000000000000000001??????????????????: s_zeros <=  27;
	 46'b00000000000000000000000000001?????????????????: s_zeros <=  28;
	 46'b000000000000000000000000000001????????????????: s_zeros <=  29;
	 46'b0000000000000000000000000000001???????????????: s_zeros <=  30;
	 46'b00000000000000000000000000000001??????????????: s_zeros <=  31;
	 46'b000000000000000000000000000000001?????????????: s_zeros <=  32;
	 46'b0000000000000000000000000000000001????????????: s_zeros <=  33;
	 46'b00000000000000000000000000000000001???????????: s_zeros <=  34;
	 46'b000000000000000000000000000000000001??????????: s_zeros <=  35;
	 46'b0000000000000000000000000000000000001?????????: s_zeros <=  36;
	 46'b00000000000000000000000000000000000001????????: s_zeros <=  37;
	 46'b000000000000000000000000000000000000001???????: s_zeros <=  38;
	 46'b0000000000000000000000000000000000000001??????: s_zeros <=  39;
	 46'b00000000000000000000000000000000000000001?????: s_zeros <=  40;
	 46'b000000000000000000000000000000000000000001????: s_zeros <=  41;
	 46'b0000000000000000000000000000000000000000001???: s_zeros <=  42;
	 46'b00000000000000000000000000000000000000000001??: s_zeros <=  43;
	 46'b000000000000000000000000000000000000000000001?: s_zeros <=  44;
	 46'b0000000000000000000000000000000000000000000001: s_zeros <=  45;
	 46'b0000000000000000000000000000000000000000000000: s_zeros <=  46;
       endcase // casex (s_fract_48_i[46:1])
     else
       s_zeros <= 0;


   always @(posedge clk_i)
     casez(s_fract_48_i) // synopsys full_case parallel_case
       48'b???????????????????????????????????????????????1: s_r_zeros <=  0;
       48'b??????????????????????????????????????????????10: s_r_zeros <=  1;
       48'b?????????????????????????????????????????????100: s_r_zeros <=  2;
       48'b????????????????????????????????????????????1000: s_r_zeros <=  3;
       48'b???????????????????????????????????????????10000: s_r_zeros <=  4;
       48'b??????????????????????????????????????????100000: s_r_zeros <=  5;
       48'b?????????????????????????????????????????1000000: s_r_zeros <=  6;
       48'b????????????????????????????????????????10000000: s_r_zeros <=  7;
       48'b???????????????????????????????????????100000000: s_r_zeros <=  8;
       48'b??????????????????????????????????????1000000000: s_r_zeros <=  9;
       48'b?????????????????????????????????????10000000000: s_r_zeros <=  10;
       48'b????????????????????????????????????100000000000: s_r_zeros <=  11;
       48'b???????????????????????????????????1000000000000: s_r_zeros <=  12;
       48'b??????????????????????????????????10000000000000: s_r_zeros <=  13;
       48'b?????????????????????????????????100000000000000: s_r_zeros <=  14;
       48'b????????????????????????????????1000000000000000: s_r_zeros <=  15;
       48'b???????????????????????????????10000000000000000: s_r_zeros <=  16;
       48'b??????????????????????????????100000000000000000: s_r_zeros <=  17;
       48'b?????????????????????????????1000000000000000000: s_r_zeros <=  18;
       48'b????????????????????????????10000000000000000000: s_r_zeros <=  19;
       48'b???????????????????????????100000000000000000000: s_r_zeros <=  20;
       48'b??????????????????????????1000000000000000000000: s_r_zeros <=  21;
       48'b?????????????????????????10000000000000000000000: s_r_zeros <=  22;
       48'b????????????????????????100000000000000000000000: s_r_zeros <=  23;
       48'b???????????????????????1000000000000000000000000: s_r_zeros <=  24;
       48'b??????????????????????10000000000000000000000000: s_r_zeros <=  25;
       48'b?????????????????????100000000000000000000000000: s_r_zeros <=  26;
       48'b????????????????????1000000000000000000000000000: s_r_zeros <=  27;
       48'b???????????????????10000000000000000000000000000: s_r_zeros <=  28;
       48'b??????????????????100000000000000000000000000000: s_r_zeros <=  29;
       48'b?????????????????1000000000000000000000000000000: s_r_zeros <=  30;
       48'b????????????????10000000000000000000000000000000: s_r_zeros <=  31;
       48'b???????????????100000000000000000000000000000000: s_r_zeros <=  32;
       48'b??????????????1000000000000000000000000000000000: s_r_zeros <=  33;
       48'b?????????????10000000000000000000000000000000000: s_r_zeros <=  34;
       48'b????????????100000000000000000000000000000000000: s_r_zeros <=  35;
       48'b???????????1000000000000000000000000000000000000: s_r_zeros <=  36;
       48'b??????????10000000000000000000000000000000000000: s_r_zeros <=  37;
       48'b?????????100000000000000000000000000000000000000: s_r_zeros <=  38;
       48'b????????1000000000000000000000000000000000000000: s_r_zeros <=  39;
       48'b???????10000000000000000000000000000000000000000: s_r_zeros <=  40;
       48'b??????100000000000000000000000000000000000000000: s_r_zeros <=  41;
       48'b?????1000000000000000000000000000000000000000000: s_r_zeros <=  42;
       48'b????10000000000000000000000000000000000000000000: s_r_zeros <=  43;
       48'b???100000000000000000000000000000000000000000000: s_r_zeros <=  44;
       48'b??1000000000000000000000000000000000000000000000: s_r_zeros <=  45;
       48'b?10000000000000000000000000000000000000000000000: s_r_zeros <=  46;
       48'b100000000000000000000000000000000000000000000000: s_r_zeros <=  47;
       48'b000000000000000000000000000000000000000000000000: s_r_zeros <=  48;
     endcase // casex (s_fract_48_i)

   assign s_exp_10a = s_exp_10_i + {9'd0,s_carry};		
   assign s_exp_10b = s_exp_10a - {4'd0,s_zeros};

   wire [9:0] v_shr1;
   wire [9:0] v_shl1;
   
   assign v_shr1 = (s_exp_10a[9] | !(|s_exp_10a)) ?
		   10'd1 - s_exp_10a + {9'd0,s_carry} :
		   (s_exp_10b[9] | !(|s_exp_10b)) ?
		   0 :
		   s_exp_10b[8] ?
		   0 : {9'd0,s_carry};
   
   assign v_shl1 = (s_exp_10a[9] | !(|s_exp_10a)) ?
		   0 :
		   (s_exp_10b[9] | !(|s_exp_10b)) ?
		   {4'd0,s_zeros} - s_exp_10a :
		   s_exp_10b[8] ?
		   0 : {4'd0,s_zeros};
   
		   
   always @(posedge clk_i)
     begin
	if ((s_exp_10a[9] | !(|s_exp_10a)))
	  s_expo1 <= 9'd1;
	else if (s_exp_10b[9] | !(|s_exp_10b))
	  s_expo1 <= 9'd1;
	else if (s_exp_10b[8])
	  s_expo1 <= 9'b011111111;
	else
	  s_expo1 <= s_exp_10b[8:0];
	
	if (v_shr1[6])
	  s_shr2 <= {6{1'b1}};
	else
	  s_shr2 <= v_shr1[5:0];

	s_shl2 <= v_shl1[5:0];
     end // always @ (posedge clk_i)
   	
   // *** Stage 2 ***
   // Shifting the fraction and rounding
		
		
   // shift the fraction
   always @(posedge clk_i)
     if (|s_shr2)
       s_frac2a <= s_fract_48_i >> s_shr2;
     else 
       s_frac2a <= s_fract_48_i << s_shl2; 
	
   assign s_expo2b = s_frac2a[46] ? s_expo1 : s_expo1 - 9'd1;

   // signals if precision was last during the right-shift above
   assign s_lost = (s_shr2 + {5'd0,s_shr3}) > s_r_zeros;
   
   // ***Stage 3***
   // Rounding

   //								   23
   //									|	
   // 			xx00000000000000000000000grsxxxxxxxxxxxxxxxxxxxx
   // guard bit: s_frac2a[23] (LSB of output)
   // round bit: s_frac2a[22]
   assign s_guard = s_frac2a[22];
   assign s_round = s_frac2a[21];
   assign s_sticky = (|s_frac2a[20:0]) | s_lost;
   
   assign s_roundup = s_rmode_i==2'b00 ? // round to nearest even
		      s_guard & ((s_round | s_sticky) | s_frac2a[23]) :
		      s_rmode_i==2'b10 ? // round up
		      (s_guard | s_round | s_sticky) & !s_sign_i :
		      s_rmode_i==2'b11 ? // round down
		      (s_guard | s_round | s_sticky) & s_sign_i : 
		      0; // round to zero(truncate = no rounding)
   
   
   always @(posedge clk_i)
     if (s_roundup)
       s_frac_rnd <= s_frac2a[47:23] + 1; 
     else 
       s_frac_rnd <= s_frac2a[47:23];
	
   assign s_shr3 = s_frac_rnd[24];


   assign s_expo3 = (s_shr3 & (s_expo2b!=9'b011111111)) ? 
		     s_expo2b + 1 : s_expo2b;
   
   assign s_frac3 = (s_shr3 & (s_expo2b!=9'b011111111)) ? 
		     {1'b0,s_frac_rnd[24:1]} : s_frac_rnd; 
	
   //-***Stage 4****
   // Output
		
   assign s_op_0 = !((|s_opa_i[30:0]) & (|s_opb_i[30:0]));
	
   assign s_infa = &s_expa;
   
   assign s_infb = &s_expb;
      
   assign s_nan_a = s_infa & (|s_opa_i[22:0]);
   
   assign s_nan_b = s_infb & (|s_opb_i[22:0]);
   
   assign s_nan_in = s_nan_a | s_nan_b;
   
   assign s_nan_op = (s_infa | s_infb) & s_op_0; // 0 * inf = nan
   
   assign s_overflow = (s_expo3==9'b011111111) & !(s_infa | s_infb);
   
   assign s_ine_o = !s_op_0 & (s_lost | (|s_frac2a[22:0]) | s_overflow);
   
   assign s_output_o = (s_nan_in | s_nan_op) ?
		       {s_sign_i,QNAN} :
		       (s_infa | s_infb) | s_overflow ?
		       {s_sign_i,INF} :
		       s_r_zeros==48 ?
		       {s_sign_i,ZERO_VECTOR} :
		       {s_sign_i,s_expo3[7:0],s_frac3[22:0]};

endmodule // or1200_fpu_post_norm_mul

